url = "https://www.health.ny.gov/statistics/vital_statistics/2019/table23.htm"
induced_abortion =
read_html(url) %>%
html_table(header = FALSE) %>%
first() %>%
janitor::clean_names()
data cleaning NH
clean_nh =
induced_abortion %>%
select(1,3,5,7,9,11) %>%
purrr::set_names(c("borough", "total", "Non_Hispanic_Ratio", "NH_White_Only_Ratio", "NH_Black_Only_Ratio", "NH_Other_Ratio")) %>%
slice(4, 6:11) %>%
mutate(
borough = str_replace(borough, "Kings", "Brooklyn"),
borough = ifelse(as.character(borough) == "New York", "Manhattan", as.character(borough)),
borough = str_replace(borough, "Richmond", "Staten Island"),
total = str_replace(total, ",", ""),
Non_Hispanic_Ratio = str_replace(Non_Hispanic_Ratio, ",", ""),
NH_White_Only_Ratio = str_replace(NH_White_Only_Ratio, ",", ""),
NH_Black_Only_Ratio = str_replace(NH_Black_Only_Ratio, ",", ""),
NH_Other_Ratio = str_replace(NH_Other_Ratio, ",", ""),
) %>%
mutate_at(c("total", "Non_Hispanic_Ratio", "NH_White_Only_Ratio", "NH_Black_Only_Ratio", "NH_Other_Ratio"), as.numeric)
write_csv(clean_nh, file = "data/abortion_race_nh.csv")
data cleaning H
clean_h =
induced_abortion %>%
select(1,3,13,15,17) %>%
purrr::set_names(c("borough", "total", "Hispanic_Ratio", "H_White_Only_Ratio", "H_Black_Only_Ratio")) %>%
slice(4, 6:11) %>%
mutate(
borough = str_replace(borough, "Kings", "Brooklyn"),
borough = ifelse(as.character(borough) == "New York", "Manhattan", as.character(borough)),
borough = str_replace(borough, "Richmond", "Staten Island"),
total = str_replace(total, ",", ""),
Hispanic_Ratio = str_replace(Hispanic_Ratio, ",", ""),
H_White_Only_Ratio = str_replace(H_White_Only_Ratio, ",", ""),
H_Black_Only_Ratio = str_replace(H_Black_Only_Ratio, ",", "")
) %>%
mutate_at(c("total", "Hispanic_Ratio", "H_White_Only_Ratio", "H_Black_Only_Ratio"), as.numeric)
write_csv(clean_h, file = "data/abortion_race_h.csv")
ggplot: induced abortion vs race (Non-Hispanic)
total_abortion_nhrace =
clean_nh %>%
slice_head(n = 1) %>%
pivot_longer(
total:NH_Other_Ratio,
names_to = "race",
values_to = "abortion"
)
abortion_race_nhplot =
clean_nh %>%
select(-total) %>%
pivot_longer(
Non_Hispanic_Ratio:NH_Other_Ratio,
names_to = "race",
values_to = "abortion"
) %>%
plot_ly(x = ~race, y = ~abortion, color = ~borough, type = "bar", colors = "viridis") %>% layout(title = 'Abortion Ratios by Non-Hispanic Race for Boroughs and New York State', yaxis = list(title = 'Number of Induced Abortions per 1,000 Live Births'))
abortion_race_nhplot
ggplot: induced abortion vs race (Hispanic)
total_abortion_hrace =
clean_h %>%
slice_head(n = 1) %>%
pivot_longer(
Hispanic_Ratio:H_Black_Only_Ratio,
names_to = "race",
values_to = "abortion"
)
abortion_race_h =
clean_h %>%
select(-total) %>%
pivot_longer(
Hispanic_Ratio:H_Black_Only_Ratio,
names_to = "race",
values_to = "abortion"
) %>%
plot_ly(x = ~race, y = ~abortion, color = ~borough, type = "bar", colors = "viridis") %>% layout(title = 'Abortion Ratios by Hispanic Race for Boroughs and New York State', yaxis = list(title = 'Number of Induced Abortions per 1,000 Live Births'))
abortion_race_h